%% Michael D. Konig, Andrei Levchenko, Tim Rogers and Fabrizio Zilibotti
%% Aggregate fluctuations in adaptive production networks
%% Forthcoming in Proceedings of the National Academy of Sciences, 2022
 
%% Replication file (Matlab) for estimating the attachment kernels. Used as input for "impulse_response.m" and "autarky.m".

%% Requirements: 

% "entry_exit_times_us_manuf.csv" -> CSV file with the following colums (US manufacturing firms only): firm_factset_id; entry_time; exit_time; censored
% "entry_exit_times_us.csv" -> CSV file with the following colums (US firms only): firm_factset_id; entry_time; exit_time; censored
% "entry_exit_times_manuf.csv" -> CSV file with the following colums (manufacturing firms only): firm_factset_id; entry_time; exit_time; censored
% "entry_exit_times.csv" -> CSV file with the following colums (full sample): firm_factset_id; entry_time; exit_time; censored
% "in_deg_years_us_manuf.mat" -> Matlab data file with firms' in-degrees/number of suppliers (US manufacturing firms only).
% "in_deg_years_us.mat" -> Matlab data file with firms' in-degrees/number of suppliers (US firms only).
% "in_deg_years_manuf.mat" -> Matlab data file with firms' in-degrees/number of suppliers (manufacturing firms only).
% "in_deg_years.mat" -> Matlab data file with firms' in-degrees/number of suppliers (full sample).
% "out_deg_years_us_manuf.mat" -> Matlab data file with firms' out-degrees/number of customers (US manufacturing firms only).
% "out_deg_years_us.mat -> Matlab data file with firms' out-degrees/number of customers (US firms only).
% "out_deg_years_manuf.mat -> Matlab data file with firms' out-degrees/number of customers (manufacturing firms only).
% "out_deg_years.mat -> Matlab data file with firms' out-degrees/number of customers (full samlpe).
% "firm_factset_ids_us_manuf.csv" -> Auxiliary CSV file for mapping firm ids across data files with the following colums (US manufacturing firms only): id; firm_factset_id 
% "firm_factset_ids_us.csv" -> Auxiliary CSV file for mapping firm ids across data files with the following colums (US firms only): id; firm_factset_id 
% "firm_factset_ids_manuf.csv" -> Auxiliary CSV file for mapping firm ids across data files with the following colums (manufacturing firms only): id; firm_factset_id 
% "firm_factset_ids.csv" -> Auxiliary CSV file for mapping firm ids across data files with the following colums (full sample): id; firm_factset_id 

clear all
close all

%% Parameters.
drop_non_us = false;
drop_non_manuf = false;
theta_0 = 0.1; % Initial value of eta for optimization algorithm.
lb = 0; % Lower bound for bounded optimization algorithm.
ub = [];% Upper bound for bounded optimization algorithm.

opt_alg = 'fmincon'; % choose from: 'nm' 'fmincon' 'fminunc' 'sa'
global options_fmincon;
global options_QP;
global options_NM;
global options_fminunc;
global options_SA;
options_fmincon = optimset('Display','off'); 
options_QP = optimset('Display','off'); 
options_NM = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');
options_SA = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');
options_fminunc = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');

%% Load entry and exit times. 
disp(['Load entry and exit times...'])
if(drop_non_us)
    if(drop_non_manuf)
        filename =  strcat('Data/entry_exit_times_us_manuf.csv'); % firm_factset_id; entry_time; exit_time; censored
    else
        filename =  strcat('Data/entry_exit_times_us.csv'); % firm_factset_id; entry_time; exit_time; censored
    end
else
    if(drop_non_manuf)
        filename =  strcat('Data/entry_exit_times_manuf.csv'); % firm_factset_id; entry_time; exit_time; censored
    else
        filename =  strcat('Data/entry_exit_times.csv'); % firm_factset_id; entry_time; exit_time; censored
    end
end
fid = fopen(filename);
dat = textscan(fid,repmat('%s ',1,4), 'delimiter', ';', 'headerlines', 1);
fclose(fid);
entry_exit_firm_factset_id = str2double(strtrim(dat{1}));
entry_time = str2double(strtrim(dat{2}));
exit_time = str2double(strtrim(dat{3}));
clear('dat')

%% Load in-degrees.
disp(['Load in-degrees...'])
if(drop_non_us)
    if(drop_non_manuf)
        load('./Data/in_deg_years_us_manuf.mat')
    else
        load('./Data/in_deg_years_us.mat')
    end
else
    if(drop_non_manuf)
        load('./Data/in_deg_years_manuf.mat')
    else
        load('./Data/in_deg_years.mat')
    end
end 

%% Compute the number of firms in each year. The total number of firms is the number of entering firms less the number of exiting firms.
years = min(entry_time):1:max(exit_time);
num_firms = [];
for s=1:length(years)
    num_firms_entering = sum(entry_time==years(s));
    num_firms_exiting = sum(exit_time==years(s));
    if(s==1)
        num_firms = [years(s) num_firms_entering-num_firms_exiting];
    else
        num_firms = [num_firms; [years(s) num_firms(s-1,2)+num_firms_entering-num_firms_exiting]];
    end
end

%% Load out-degrees.
disp(['Load out-degrees...'])
if(drop_non_us)
    if(drop_non_manuf)
        load('./Data/out_deg_years_us_manuf.mat')
    else
        load('./Data/out_deg_years_us.mat')
    end
else
    if(drop_non_manuf)
        load('./Data/out_deg_years_manuf.mat')
    else
        load('./Data/out_deg_years.mat')
    end
end 
%out_deg_years(:,1) = [];

%% Load firm id mappings data.
if(drop_non_us)
    if(drop_non_manuf)
        fid = fopen(['Data/firm_factset_ids_us_manuf.csv']);
    else
        fid = fopen(['Data/firm_factset_ids_us.csv']);
    end
else
    if(drop_non_manuf)
        fid = fopen(['Data/firm_factset_ids_manuf.csv']);
    else
        fid = fopen(['Data/firm_factset_ids.csv']);
    end
end
dat = textscan(fid,repmat('%s ',1,2), 'delimiter', ';', 'headerlines', 1);
fclose(fid);
factset_firm_id = str2double(strtrim(dat{2}));
clear('dat')

%% Compute maximum likelihood fitness estimates for each firm.
disp(['Compute maximum likelihood fitness estimates for each firm...'])
mle_eta = [];
num_iterations = length(factset_firm_id);
increment = floor(num_iterations/50);
for i=1:length(factset_firm_id)
    
    %% Construct data for each firm.
    ind_firm = find(entry_exit_firm_factset_id==factset_firm_id(i));
    if(~isempty(ind_firm))

        %% Generate data for each firm.
        years = entry_time(ind_firm):1:exit_time(ind_firm); % Years of existence of the firm.
        out_deg = zeros(length(years),1); % Out-degree over the years of existence of the firm.
        num_incumbents = zeros(length(years),1); % Number of incumbent firms.
        num_links = zeros(length(years),1); % Number of links created by firms entering every year.
        for s=1:length(years)
            ind_year = find(out_deg_years(:,1)==years(s));
            if(~isempty(ind_year))
                out_deg(s) = out_deg_years(ind_year,1+i);
                ind_year = find(num_firms(:,1)==years(s));
                if(~isempty(ind_year))
                    num_incumbents(s) = num_firms(ind_year,2);
                end
                ind_year = find(in_deg_years(:,1)==years(s));
                if(~isempty(ind_year))
                    num_links(s) = sum(in_deg_years(ind_year,2:end));
                end
            end
        end

        out_deg = out_deg(1:end-1);
        num_incumbents = num_incumbents(1:end-1);
        num_links = num_links(1:end-1);

        %% MLE.
        switch opt_alg
            
            case 'fmincon'
                
                %disp(['using a constrained optimization algorithm (fmincon)...'])
                [theta_opt,fval,exitflag,output,lambda,grad,hessian] = fmincon(@(theta) -fitness_log_likelihood(theta,out_deg,num_incumbents,num_links),theta_0,[],[],[],[],lb,ub,[],options_fmincon);
                
            case 'fminunc'
                
                %disp(['using an unconstrained optimization algorithm (fminunc)...'])
                [theta_opt,fval,exitflag,output,grad,hessian] = fminunc(@(theta) -fitness_log_likelihood(theta,out_deg,num_incumbents,num_links),theta_0);%,options_fminunc);
                
            case 'nm'
                
                %disp(['using an unconstrained Nelder-Mead algorithm...'])
                [theta_opt,fval] = fminsearch(@(theta) -fitness_log_likelihood(theta,out_deg,num_incumbents,num_links),theta_0);%,options_NM);
                
            case 'sa'
                
                %disp(['using a constrained simulated annealing algorithm...'])
                [theta_opt,fval,exitFlag,out] = simulannealbnd(@(theta) -fitness_log_likelihood(theta,out_deg,num_incumbents,num_links),theta_0,lb,ub);%,options_SA);
                
            otherwise
                
                error('No optimization algorithm specified.')
        end

        %% Set eta to zero for those firms that have zero out-degree accross all years.
        if(sum(out_deg_years(:,1+ind_firm))==0)
            theta_opt = 0;
        end

        mle_eta = [mle_eta; [factset_firm_id(i) theta_opt]];
        
    else

        mle_eta = [mle_eta; [factset_firm_id(i) 0]];

    end
    
    %% Display progress of the for loop.
    if(mod(i,increment)==0)
        fprintf('Progress...%.0f%%\n',round(100*i/num_iterations));
    end

end

%% Save the estimated fitness values to a file.
if(drop_non_us)
    if(drop_non_manuf)
        save('./Data/mle_eta_us_manuf.mat','mle_eta')
    else
        save('./Data/mle_eta_us.mat','mle_eta')
    end
else
    if(drop_non_manuf)
        save('./Data/mle_eta_manuf.mat','mle_eta')
    else
        save('./Data/mle_eta.mat','mle_eta')
    end
end